home *** CD-ROM | disk | FTP | other *** search
/ Amiga Format CD 7 / Amiga Format AFCD07 (Dec 1996, Issue 91).iso / serious / shareware / comms / internet / html-related / hsc / source / hsclib / skip.c < prev    next >
C/C++ Source or Header  |  1996-08-03  |  17KB  |  629 lines

  1. /*
  2.  * skip.c
  3.  *
  4.  * functions for skipping several things
  5.  *
  6.  * Copyright (C) 1995,96 Thomas Aglassinger
  7.  *
  8.  * This program is free software; you can redistribute it and/or modify
  9.  * it under the terms of the GNU General Public License as published by
  10.  * the Free Software Foundation; either version 2 of the License, or
  11.  * (at your option) any later version.
  12.  *
  13.  * This program is distributed in the hope that it will be useful,
  14.  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15.  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16.  * GNU General Public License for more details.
  17.  *
  18.  * You should have received a copy of the GNU General Public License
  19.  * along with this program; if not, write to the Free Software
  20.  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  21.  *
  22.  * updated: 30-Jul-1996
  23.  * created:  8-Oct-1995
  24.  */
  25.  
  26. #define NOEXTERN_HSCLIB_SKIP_H
  27.  
  28. #include "hsclib/inc_base.h"
  29.  
  30. #include "hsclib/input.h"
  31. #include "hsclib/skip.h"
  32.  
  33. /* debug skip */
  34. #if 1
  35. #define DS(x) if(hp->debug) x
  36. #else
  37. #define DS(x)
  38. #endif
  39. #define DHLS "*hsclib* skip: "
  40.  
  41. /*
  42.  * skip_lf
  43.  *
  44.  * ignore '\n'
  45.  *
  46.  * params: inpf...input file to read char from
  47.  * result: TRUE if skipped
  48.  */
  49. BOOL skip_lf(HSCPRC * hp)
  50. {
  51.     /*
  52.      * TODO: check, why skip_lf() should not be allowed to
  53.      *   change the infp's current word
  54.      */
  55.     /* TODO: skip white-spaces after linefeed, if COMPACT set */
  56.     INFILE *inpf = hp->inpf;
  57.  
  58. #if 0
  59.     int nc = infgetc(inpf);
  60.  
  61.     if (nc != '\n')
  62.         inungetc(nc, inpf);
  63.  
  64.     return ((BOOL) (nc == EOF));
  65. #else
  66.     BOOL skipped = FALSE;
  67.     STRPTR nw = infgetw(inpf);
  68.  
  69.     if (nw)
  70.         if (strcmp(nw, "\n"))
  71.             inungetcwws(inpf);
  72.         else
  73.         {
  74.             if (hp->compact && !(hp->inside_pre))
  75.             {
  76.                 if (infskip_ws(inpf))
  77.                 {
  78.                     DMSG("skipped whtspc after LF");
  79.                 }
  80.             }
  81.             skipped = TRUE;
  82.         }
  83.  
  84.     return skipped;
  85. #endif
  86. }
  87.  
  88. /*
  89.  * skip_lfs
  90.  *
  91.  * skip until any <> LF is found
  92.  *
  93.  * params: inpf...input file to read(s) char from
  94.  * result: TRUE if skipped at least one LF
  95.  */
  96. BOOL skip_lfs(HSCPRC * hp)
  97. {
  98.     BOOL skipped_any = FALSE;
  99.     INFILE *inpf = hp->inpf;
  100.     STRPTR nw = infgetw(inpf);
  101.  
  102.     /* skip linefeeds */
  103.     while (nw && (!strcmp(nw, "\n")))
  104.     {
  105.  
  106.         skipped_any = TRUE;
  107.         nw = infgetw(inpf);
  108.  
  109.     }
  110.  
  111.     /* write back last white spaces and word */
  112.     if (nw)
  113.         inungetcwws(inpf);
  114.  
  115.     return (skipped_any);
  116. }
  117.  
  118. BOOL eot_reached(HSCPRC * hp, BYTE * state)
  119. {
  120.     INFILE *inpf = hp->inpf;
  121.     STRPTR nw = infgetw(inpf);
  122.  
  123.     if (nw)
  124.     {
  125.  
  126.         switch (*state)
  127.         {
  128.  
  129.         case TGST_TAG:
  130.             if (!strcmp(nw, "\""))
  131.                 *state = TGST_DQUOTE;
  132.             else if (!strcmp(nw, "'"))
  133.                 *state = TGST_QUOTE;
  134. /* TODO: skip references & expressions */
  135. #if 0
  136.             else if (!strcmp(nw, "<"))
  137.                 *state = TGST_REF;
  138. #endif
  139.             else if (!strcmp(nw, ">"))
  140.                 *state = TGST_END;
  141.             break;
  142.  
  143.         case TGST_REF:
  144.         case TGST_QUOTE:
  145.         case TGST_DQUOTE:
  146.  
  147.             if (strcmp(nw, "\n"))
  148.             {
  149.  
  150.                 switch (*state)
  151.                 {
  152.  
  153.                 case TGST_REF:
  154.                     if (!strcmp(nw, ">"))
  155.                         *state = TGST_TAG;
  156.                     break;
  157.  
  158.                 case TGST_QUOTE:
  159.                     if (!strcmp(nw, "'"))
  160.                         *state = TGST_TAG;
  161.                     break;
  162.  
  163.                 case TGST_DQUOTE:
  164.                     if (!strcmp(nw, "\""))
  165.                         *state = TGST_TAG;
  166.                     break;
  167.                 }
  168.  
  169.             }
  170.             else
  171.             {
  172.  
  173.                 /* unexpected end of line */
  174.                 hsc_msg_eol(hp);
  175.                 *state = TGST_TAG;      /* go on reading inside tag */
  176.  
  177.             }
  178.  
  179.             break;
  180.  
  181.         }
  182.     }
  183.     else
  184.     {
  185.  
  186.         hsc_msg_eof(hp, "`>' expected");
  187.         *state = TGST_ERR;
  188.  
  189.     }
  190.  
  191.     return ((BOOL) ((*state == TGST_END) || (*state == TGST_ERR)));
  192. }
  193.  
  194. /*
  195.  * skip_until_eot_args
  196.  *
  197.  * skip until end of tag reached,
  198.  * with user definable status vars
  199.  *
  200.  * params: inpf.....input file
  201.  *         quote....status for quote (TRUE=inside quote)
  202.  *         dquote...status for double quote
  203.  *         argattr..status for quote
  204.  * result: TRUE, if no fatal error
  205.  * errors: return FALSE
  206.  */
  207. BOOL skip_until_eot_state(HSCPRC * hp, BYTE * state, EXPSTR * logstr)
  208. {
  209.     INFILE *inpf = hp->inpf;
  210.  
  211.     while (!eot_reached(hp, state))
  212.         if (logstr)
  213.         {
  214.  
  215.             app_estr(logstr, infgetcws(inpf));
  216.             app_estr(logstr, infgetcw(inpf));
  217.  
  218.         }
  219.  
  220.     /* append ">" */
  221.     if (logstr)
  222.     {
  223.  
  224.         app_estr(logstr, infgetcws(inpf));
  225.         app_estr(logstr, infgetcw(inpf));
  226.  
  227.     }
  228.  
  229.     return ((BOOL) ! (hp->fatal));
  230. }
  231.  
  232. /*
  233.  * skip_until_eot
  234.  *
  235.  * skip until end of tag reached
  236.  *
  237.  * params: inpf..input file
  238.  * result: TRUE, if no fatal error
  239.  * errors: return FALSE
  240.  */
  241. BOOL skip_until_eot(HSCPRC * hp, EXPSTR * logstr)
  242. {
  243.     BYTE state = TGST_TAG;
  244.  
  245.     return (skip_until_eot_state(hp, &state, logstr));
  246. }
  247.  
  248. /*
  249.  *-----------------
  250.  * skip comment
  251.  *-----------------
  252.  */
  253.  
  254. /*
  255.  * eoc_reched
  256.  *
  257.  * check if end of an hsc-comment is reached
  258.  *
  259.  * params:
  260.  *   inpf...where to read next word from
  261.  *   state..state var; has to be initiales by
  262.  *          calling func with CMST_TEXT
  263.  *   nest...comment netsing counter; has to be
  264.  *          initiales by calling func with 0
  265.  * result: TRUE, if end of comment reached
  266.  */
  267. BOOL eoc_reached(HSCPRC * hp, BYTE * state, LONG * nest)
  268. {
  269.     INFILE *inpf = hp->inpf;
  270.     STRPTR nw = infgetw(inpf);
  271.  
  272.     if (nw)
  273.     {
  274.  
  275.         switch (*state)
  276.         {
  277.  
  278.         case CMST_TEXT:
  279.             if (!strcmp(nw, "*"))
  280.                 *state = CMST_STAR;
  281.             else if (!strcmp(nw, "<"))
  282.                 *state = CMST_TAG;
  283.             break;
  284.  
  285.         case CMST_STAR:
  286.             if (!strcmp(nw, "*"))
  287.                 *state = CMST_STAR;
  288.             else if (!strcmp(nw, "<"))
  289.                 *state = CMST_TAG;
  290.             else if (!strcmp(nw, ">"))
  291.                 if (*nest)
  292.                 {
  293.  
  294.                     (*nest)--;
  295.                     *state = CMST_TEXT;
  296.  
  297.                 }
  298.                 else
  299.                     *state = CMST_END;
  300.  
  301.             break;
  302.  
  303.         case CMST_TAG:
  304.             if (!strcmp(nw, "<"))
  305.                 *state = CMST_TAG;
  306.             else
  307.             {
  308.  
  309.                 if (!strcmp(nw, "*"))
  310.                     (*nest)++;
  311.                 *state = CMST_TEXT;
  312.  
  313.             }
  314.             break;
  315.  
  316.         }
  317.     }
  318.     else
  319.     {
  320.  
  321.         hsc_msg_eof(hp, "missing end of comment (\"*>\")");
  322.         *state = CMST_ERR;
  323.  
  324.     }
  325.  
  326.     return ((BOOL) ((*state == CMST_END) || (*state == CMST_ERR)));
  327. }
  328.  
  329. /*
  330.  * skip_hsc_comment
  331.  *
  332.  * skip text until '*>' occures;
  333.  * nested commets are supported
  334.  *
  335.  */
  336. BOOL skip_hsc_comment(HSCPRC * hp)
  337. {
  338.     BYTE cstate = CMST_TEXT;    /* vars for eoc_reached() */
  339.     LONG cnest = 0;
  340.     BOOL end = FALSE;           /* end of comment reached? */
  341.  
  342.     while (!end && !(hp->fatal))
  343.     {
  344.  
  345.         end = eoc_reached(hp, &cstate, &cnest);
  346.  
  347.     }
  348.  
  349.     return ((BOOL) ! (hp->fatal));
  350. }
  351.  
  352. /*
  353.  * skip_until_tag
  354.  *
  355.  * skip everythin, until a specific tag (one of tagstoplist or tagnest)
  356.  * is found.
  357.  *
  358.  * params:
  359.  *  hp hsc-process to work with
  360.  *  tagfound     destination string that will store name of tag that lead
  361.  *               to abortion of skip (eg "$else"); if this string is NULL,
  362.  *               it will be ignored
  363.  *  tagstoplist  list of tags to stop on, sparated with vertical bars `|'
  364.  *               eg. "$else|$elseif"
  365.  *  tagnest      single tag, that maintains a nesting-counter, depending
  366.  *               on wheter it occures as a start-tag or not; if the
  367.  *               nesting-counter is 0 and it occures as a stop-tag, it
  368.  *               will also stop skipping (eg "$if")
  369.  */
  370. #define STATE_TEXT            1 /* normal text */
  371. #define STATE_TAG             2 /* after "<" */
  372. #define STATE_COMMENT         3 /* inside hsc-comment */
  373. #define STATE_COMMENT_STAR    4 /* inside hsc-comment, after "*" */
  374. #define STATE_TAG_STOP        5 /* found tag in stoplist */
  375. #define STATE_SKIP            6 /* inside `skip section' "<|..|>" */
  376. #define STATE_VBAR            7 /* inside `skip section', after "|" */
  377. #define STATE_TAGNAME         8 /* after tagname */
  378. #define STATE_ENDTAGNAME      9 /* after end-tagname */
  379.  
  380. #define STATE_TAGATTR        10 /* parsing tag-attribs */
  381. #define STATE_TAGATTR_EQ     11 /* "=" inside tag */
  382. #define STATE_TAGATTR_DQUOTE 12 /* double quote after "=" inside tag */
  383. #define STATE_TAGATTR_SQUOTE 13 /* single quote after "=" inside tag */
  384. #define STATE_COMMENT_TAG    14 /* found "<" inside comment (nest comment) */
  385.  
  386. #define STATE_EXIT_ERROR_EOF 99 /* unexpected eof */
  387.  
  388. BOOL skip_until_tag(HSCPRC * hp, EXPSTR * tagfound, STRPTR tagstoplist, STRPTR tagnest)
  389. {
  390.     UBYTE state = STATE_TEXT;   /* */
  391.     INFILE *inpf = hp->inpf;    /* input file */
  392.     LONG nesting = 0;           /* tag-nesting */
  393.     LONG nesting_comment = 0;   /* comment-nesting */
  394.     STRPTR nw = NULL;
  395.     BOOL quit = FALSE;          /* flag: exit from skipping */
  396.     EXPSTR *ungetstr = init_estr(32);
  397.  
  398.     clr_estr(tagfound);
  399.  
  400.     do
  401.     {
  402.         /* get next word or tag-id */
  403.         if (state != STATE_TAG)
  404.             nw = infgetw(inpf);
  405.         else
  406.         {
  407.             nw = infget_tagid(hp);
  408.             if (nw)
  409.             {
  410.                 app_estr(ungetstr, infgetcws(inpf));
  411.                 app_estr(ungetstr, infgetcw(inpf));
  412.                 if (strcmp(nw, "/"))
  413.                 {
  414.                     DS(fprintf(stderr, DHLS "start-tag <%s>\n", nw));
  415.                     state = STATE_TAGNAME;      /* tag detected */
  416.                 }
  417.                 else
  418.                 {
  419.                     nw = infget_tagid(hp);
  420.                     DS(fprintf(stderr, DHLS "end-tag </%s>\n", nw));
  421.                     app_estr(ungetstr, infgetcws(inpf));
  422.                     app_estr(ungetstr, infgetcw(inpf));
  423.                     state = STATE_ENDTAGNAME;   /* end-tag detected */
  424.                 }
  425.             }
  426.         }
  427.  
  428.         if (nw)
  429.         {
  430.             switch (state)
  431.             {
  432.  
  433.                 /* check if tag starts */
  434.             case STATE_TEXT:
  435.                 if (!strcmp(nw, "<"))
  436.                 {
  437.                     DS(fprintf(stderr, DHLS "tag\n"));
  438.                     set_estr(ungetstr, nw);
  439.                     state = STATE_TAG;
  440.                 }
  441.                 break;
  442.  
  443.                 /* check which tag it is and how to act */
  444.             case STATE_TAGNAME:
  445.                 {
  446.                     /* check, if nesting-tag should be incr. */
  447.                     if (!upstrcmp(nw, tagnest))
  448.                     {
  449.                         DS(fprintf(stderr, DHLS "nest-tag (%ld)\n", nesting));
  450.                         state = STATE_TAGATTR;
  451.                         nesting++;
  452.                     }
  453.                     /* check, if stop-tag reached */
  454.                     else if (!nesting
  455.                              && strenum(nw, tagstoplist, '|', STEN_NOCASE))
  456.                     {
  457.                         DS(fprintf(stderr, DHLS "stop-tag `%s'\n", nw));
  458.                         set_estr(tagfound, nw);
  459.                         quit = TRUE;
  460.                     }
  461.                     /* check, if commant-tag reached */
  462.                     else if (!strcmp(nw, HSC_COMMENT_STR))
  463.                     {
  464.                         DS(fprintf(stderr, DHLS "comment-tag (0)\n"));
  465.                         state = STATE_COMMENT;
  466.                     }
  467.                     /* any tag; just skip attributes */
  468.                     else
  469.                     {
  470.                         DS(fprintf(stderr, DHLS "any tag\n"));
  471.                         state = STATE_TAGATTR;
  472.                     }
  473.                     break;
  474.                 }
  475.  
  476.             case STATE_ENDTAGNAME:
  477.                 {
  478.                     if (!upstrcmp(nw, tagnest))
  479.                     {
  480.                         if (nesting)
  481.                         {
  482.                             nesting--;
  483.                             DS(fprintf(stderr, DHLS "nest-tag (%ld)\n", nesting));
  484.                         }
  485.                         else
  486.                         {
  487.                             DS(fprintf(stderr, DHLS "nest-tag ending\n"));
  488.                             quit = TRUE;
  489.                         }
  490.                     }
  491.                     else
  492.                         state = STATE_TEXT;     /* no attr for endtag */
  493.  
  494.                     break;
  495.                 }
  496.  
  497.                 /*
  498.                  * process tag attributes
  499.                  */
  500.             case STATE_TAGATTR:
  501.                 {
  502.                     if (!strcmp(nw, "="))
  503.                         state = STATE_TAGATTR_EQ;
  504.                     else if (!strcmp(nw, ">"))
  505.                     {
  506.                         DS(fprintf(stderr, DHLS "back to text\n"));
  507.                         state = STATE_TEXT;
  508.                     }
  509.                     break;
  510.                 }
  511.  
  512.             case STATE_TAGATTR_EQ:
  513.                 {
  514.                     if (!strcmp(nw, "\""))
  515.                     {
  516.                         DS(fprintf(stderr, DHLS "tagarg (double quote)\n"));
  517.                         state = STATE_TAGATTR_DQUOTE;
  518.                     }
  519.                     else if (!strcmp(nw, "'"))
  520.                     {
  521.                         DS(fprintf(stderr, DHLS "tagarg (single quote)\n"));
  522.                         state = STATE_TAGATTR_SQUOTE;
  523.                     }
  524.                     else
  525.                         state = STATE_TAGATTR;
  526.                     break;
  527.                 }
  528.  
  529.             case STATE_TAGATTR_DQUOTE:
  530.                 {
  531.                     if (!strcmp(nw, "\""))
  532.                     {
  533.                         DS(fprintf(stderr, DHLS "end tagarg (double quote)\n"));
  534.                         state = STATE_TAGATTR;
  535.                     }
  536.                     break;
  537.                 }
  538.  
  539.             case STATE_TAGATTR_SQUOTE:
  540.                 {
  541.                     if (!strcmp(nw, "'"))
  542.                     {
  543.                         DS(fprintf(stderr, DHLS "end tagarg (single quote)\n"));
  544.                         state = STATE_TAGATTR;
  545.                     }
  546.                     break;
  547.                 }
  548.  
  549.                 /*
  550.                  * comment processing
  551.                  */
  552.             case STATE_COMMENT:
  553.                 {
  554.                     /* check for <" */
  555.                     if (!strcmp(nw, "<"))
  556.                         state = STATE_COMMENT_TAG;
  557.                     else if (!strcmp(nw, HSC_COMMENT_STR))
  558.                         state = STATE_COMMENT_STAR;
  559.                     break;
  560.                 }
  561.  
  562.             case STATE_COMMENT_TAG:
  563.                 {
  564.                     /* check for comment-nesting */
  565.                     if (!strcmp(nw, HSC_COMMENT_STR))
  566.                     {
  567.                         nesting_comment++;
  568.                         DS(fprintf(stderr, DHLS "comment-tag (%ld)\n", nesting_comment));
  569.                         state = STATE_COMMENT;
  570.                     }
  571.                     else
  572.                         state = STATE_COMMENT;
  573.  
  574.                     break;
  575.                 }
  576.  
  577.             case STATE_COMMENT_STAR:
  578.                 {
  579.                     /* check for end comment */
  580.                     if (!strcmp(nw, ">"))
  581.                         if (nesting_comment)
  582.                         {
  583.                             nesting_comment--;
  584.                             DS(fprintf(stderr, DHLS "end comment-tag (%ld)\n", nesting_comment));
  585.                             state = STATE_COMMENT;
  586.                         }
  587.                         else
  588.                         {
  589.                             DS(fprintf(stderr, DHLS "end comment-tag (%ld)\n", nesting_comment));
  590.                             state = STATE_TEXT;
  591.                         }
  592.                     else
  593.                         state = STATE_COMMENT;
  594.                     break;
  595.                 }
  596.  
  597.                 /*
  598.                  * unhandled tag
  599.                  */
  600.             default:
  601.                 panic("unhandled state");
  602.                 break;
  603.             }
  604.         }
  605.     }
  606.     while (nw && !quit && !(hp->fatal));
  607.  
  608.     if (nw)
  609.     {
  610.         inungets(estr2str(ungetstr), inpf);
  611.     }
  612.     else
  613.     {
  614.         EXPSTR *tagstr = init_estr(0);
  615.  
  616.         set_estr(tagstr, "</");
  617.         app_estr(tagstr, tagnest);
  618.         app_estr(tagstr, "> expected");
  619.         hsc_msg_eof(hp, estr2str(tagstr));
  620.  
  621.         del_estr(tagstr);
  622.     }
  623.  
  624.     del_estr(ungetstr);
  625.  
  626.     return ((BOOL) (nw != NULL));
  627. }
  628.  
  629.